---
title: "Replication Materials for Unveiled"
author: "Aengus Bridgman, Costin Ciobanu, Aaron Erlich"
date: "30/01/2020"
output: html_document
editor_options: 
  chunk_output_type: console
---


```{r}

#packrat::bundle()
library(fastDummies)
library(tidyverse)
library(zoo)

'%nin%' <- function(x,y)!('%in%'(x,y))

dir.create("data/ready")

```

# Media

```{r}
media = read_csv("data/raw/media.csv")

media <- media %>%
  mutate(
    quebec = case_when(
      publication %in% c("The Gazette","Sherbrooke Record") ~ 1,
      publication == "Le Droit" ~ 0,
      language == "french" ~ 1,
      language == "english" ~ 0
    )
  )

media <- media %>%
  mutate(n = 1) %>%
  tidyr::complete(., date, issue, quebec, fill = list(n = 0)) %>%
  group_by(date, issue, quebec) %>%
  summarize(n = sum(n)) %>%
  mutate(var = paste(issue,quebec,sep = "_")) %>%
  ungroup() %>%
  dplyr::select(-issue, -quebec) %>%
  spread(key = var, value = n) 

media <- media %>%
  gather(key = "key", value = "n", -date) %>%
  separate(key, into = c("issue","quebec"), sep = "_") %>%
  spread(key = "issue", value = "n") %>%
  mutate(
    niqab_num = niqab,
    econo_num = economy,
    # number per journal: 10 french-language, 30 english
    niqab = ifelse(quebec == 1, niqab/10, niqab/30),
    economy = ifelse(quebec == 1, economy/10, economy/30)) %>%
  group_by(quebec) %>%
  mutate(
    niqab_p = niqab / sum(niqab),
    econo_p = economy / sum(economy),
    cum_niqab = cumsum(niqab),
    cum_econo = cumsum(economy),
    cum_niqab_p = cumsum(niqab_p),
    cum_econo_p = cumsum(econo_p),
    niqab_7 = rollapplyr(niqab, width = 7, FUN = sum, partial = TRUE, align = "right"),
    niqab_num_7 = rollapplyr(niqab_num, width = 7, FUN = sum, partial = TRUE, align = "right"),
    econo_7 = rollapplyr(economy, width = 7, FUN = sum, partial = TRUE, align = "right"),
    econo_num_7 = rollapplyr(econo_num, width = 7, FUN = sum, partial = TRUE, align = "right"),
    niqab_p_7 = rollapplyr(niqab_p, width = 7, FUN = sum, partial = TRUE, align = "right"),
    econo_p_7 = rollapplyr(econo_p, width = 7, FUN = sum, partial = TRUE, align = "right"),
    niqab_p_5 = rollapplyr(niqab_p, width = 5, FUN = sum, partial = TRUE, align = "right"),
    econo_p_5 = rollapplyr(econo_p, width = 5, FUN = sum, partial = TRUE, align = "right"),
    niqab_saturation = niqab_7/econo_7
  ) %>%
  filter(date <= as.Date("2015-10-18"))

saveRDS(media, file = "data/ready/media.rds")

```

# Media sentiment

```{r}

media_sentiment = read_csv("data/raw/media_sentiment.csv")

media_sentiment = media_sentiment %>% 
  mutate(prop_positive_lpc = positive_lpc/(positive_lpc + negative_lpc),
         prop_negative_lpc = negative_lpc/(positive_lpc + negative_lpc),
         prop_positive_ndp = positive_ndp/(positive_ndp + negative_ndp),
         prop_negative_ndp = negative_ndp/(positive_ndp + negative_ndp),
         lib_net_tone = log((prop_positive_lpc+0.05)/(prop_negative_lpc+0.05)),
         ndp_net_tone = log((prop_positive_ndp+0.05)/(prop_negative_ndp+0.05)))

saveRDS(media_sentiment, "data/ready/media_sentiment.rds")

```


# CES

```{r}

# https://ces-eec.arts.ubc.ca/english-section/surveys/
load("data/raw/CES2015_Combined_R.RData")
CES <- CES2015_Combined; CES2015_Combined = NULL

CES <- CES %>% 
  mutate(
    discard = discard, 
    web = ifelse(mode == 1,1,0),
    prov = factor(case_when(
      province == 59 ~ "BC",
      province == 48 ~ "AB",
      province == 47 ~ "SK",
      province == 46 ~ "MB",
      province == 35 ~ "ON",
      province == 24 ~ "QC",
      province == 13 ~ "NB",
      province == 12 ~ "NS",
      province == 11 ~ "PEI",
      province == 10 ~ "NL",
      province %in% c(60,61,62,1000) ~ NA_character_)
    ),
    quebec = ifelse(prov == "QC",1,0),
    
    # ADD region
    region = factor(case_when(
      prov %in% "QC" ~ 'QC',
      prov %in% "ON" ~ 'ON',
      prov %in% c("AB","SK","MB") ~ 'PR',
      prov %in% c('PEI','NS', 'NL','NB') ~ 'AT',
      prov == "BC" ~ 'BC',
      TRUE ~ NA_character_)),
    
    date = as.Date(date, format = "%m/%d/%Y"),
    female = ifelse(sex_r == 1, 0, 1),
    
    # interest
    interest = case_when(
      interest == 98 ~ as.double(NA),
      interest == 99 ~ as.double(NA),
      TRUE ~ as.numeric(interest)),
    
    # Combined the vote for and leaning vote_for_if variables, added a dummy for leaners/unsure voters
    vote_for_all_parties = coalesce(vote_for, vote_for_if),
    vote_for_all_parties = factor(case_when(
      vote_for_all_parties == 1 ~ "lib",
      vote_for_all_parties == 2 ~ "con",
      vote_for_all_parties == 3 ~ "ndp",
      vote_for_all_parties == 4 ~ "bq",
      vote_for_all_parties == 5 ~ "green",
      vote_for_all_parties %in% c(0,8) ~ "other",
      vote_for_all_parties %in% c(7) ~ "wont",
      vote_for_all_parties %in% c(9,98) ~ "undecided",
      TRUE ~ NA_character_)),
    ndp_vote = ifelse(vote_for_all_parties == "ndp",1,0),
    lib_vote = ifelse(vote_for_all_parties == "lib",1,0),
    con_vote = ifelse(vote_for_all_parties == "con",1,0),
    blq_vote = ifelse(vote_for_all_parties == "bq",1,0),
    undecided_vote = ifelse(vote_for_all_parties == "undecided",1,0),
    
    # Don't know and refused are treated as NAs
    pid = factor(case_when(
      partyid == 1 ~ "lib",
      partyid == 2 ~ "con",
      partyid == 3 ~ "ndp",
      partyid == 4 ~ "bq",
      partyid == 5 ~ "green",
      partyid %in% c(0,6,8) ~ "other",
      TRUE ~ NA_character_)),
    ndp_pid = ifelse(pid == "ndp",1,0),
    lib_pid = ifelse(pid == "lib",1,0),
    con_pid = ifelse(pid == "con",1,0),
    blq_pid = ifelse(pid == "bq",1,0),
    no_pid = ifelse(pid == "other",1,0),
    
    pid_strength = case_when(
      pid_str == 1 ~ 3,
      pid_str == 3 ~ 2,
      pid_str == 5 ~ 1,
      pid_str %in% c(8,9) ~ 1,
    ),
    
    p_pid = factor(case_when(
      p_pidident == 1 | p_pidthink == 1 ~ "lib",
      p_pidident == 2 | p_pidthink == 2 ~ "con",
      p_pidident == 3 | p_pidthink == 3 ~ "ndp",
      p_pidident == 4 | p_pidthink == 4 ~ "bq",
      p_pidident == 5 | p_pidthink == 5 ~ "green",
      p_pidident %in% c(0,6,8) | p_pidthink %in% c(0,6,8) ~ "other",
      TRUE ~ NA_character_)),
    p_ndp_pid = ifelse(p_pid == "ndp",1,0),
    p_lib_pid = ifelse(p_pid == "lib",1,0),
    p_con_pid = ifelse(p_pid == "con",1,0),
    p_blq_pid = ifelse(p_pid == "bq",1,0),
    p_no_pid = ifelse(p_pid == "other",1,0),
    
    p_pid_strength = case_when(
      p_pidstr == 1 ~ 3,
      p_pidstr == 3 ~ 2,
      p_pidstr == 5 ~ 1,
      p_pidstr %in% c(8,9) ~ 1,
    ),            
    # Age variable
    age = case_when(
      age %in% c(1000,1900,1901,9998,9999) ~ as.double(NA),
      TRUE ~ 2015-as.numeric(age)),
    
    # Education variable
    education = as.factor(case_when(
      education %in% c(1,2,3,4) ~ 1,
      education %in% c(5,6,8) ~ 2,
      education %in% c(7,9) ~ 3,
      education %in% c(10,11) ~ 4,
      TRUE ~ as.double(NA))),
    education = factor(education, 
                       labels = c("No HS","HS","Bachelor","Graduate"),
                       levels = c(1,2,3,4)),
    
    no_hs = ifelse(education == "No HS",1,0),
    hs = ifelse(education == "HS",1,0),
    bach = ifelse(education == "Bachelor",1,0),
    grad = ifelse(education == "Graduate",1,0),
    
    # Religion
    religion2 = factor(case_when(
      religion %in% c(0,24,98) ~ 'Atheist/Agnostic',
      religion %in% c(1, 2, 4, 5, 9, 10, 12, 13, 14, 16, 17, 18, 19, 20) ~ 'Christian',
      religion %in% c(3, 6, 7, 8, 15, 97) ~ 'Other',
      religion == 11 ~ 'Muslim',
      TRUE ~ as.character(NA))),
    
    # Employment status
    working = case_when(
      emp_status %in% c(1, 2, 9, 10, 11) ~ 1,
      emp_status %in% c(0, 3, 4, 5, 6, 7, 8) ~ 0,
      TRUE ~ NA_real_),
    student = case_when(
      emp_status %in% c(5,9) ~ 1,
      emp_status %in% c(0, 1, 2, 3, 4, 6, 7, 8, 10, 11) ~ 0,
      TRUE ~ NA_real_),
    retired = case_when(
      emp_status %in% c(3, 11) ~ 1,
      emp_status %in% c(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10) ~ 0,
      TRUE ~ NA_real_),
    
    # Income categorical
    #data <- mutate(data,
    income_full = factor(ifelse(income_full %in% c(1000), NA, income_full)),
    income_full = factor(income_full,
                         labels = c("Less than $29,999",
                                    "$30,000 - $59,999",
                                    "$60,000 - $89,999",
                                    "$90,000 - $109,999",
                                    "More than $110,000"),
                         levels = c(1,2,3,4,5)),
    
    # Knowledge
    
    finance_minister = case_when(
      know_finmin %in% c(1,3) ~ 1,
      know_finmin %in% c(5,8) ~ 0,
      TRUE ~ as.double(NA)
    ),
    gg  = case_when(
      know_finmin %in% c(1,3) ~ 1,
      know_finmin %in% c(5,8) ~ 0,
      TRUE ~ as.double(NA)
    ),
    premier = case_when(
      know_provpm %in% c(1,3,5,8,10,11,12,13,24,35,46,47,48,59) ~ 1,
      know_provpm %in% c(97,98) ~ 0,
      TRUE ~ as.double(NA)
    ),
    putin = case_when(
      know_putin %in% c(1,3) ~ 1,
      know_putin %in% c(5,8) ~ 0,
      TRUE ~ as.double(NA)
    ),
    
    p_musl = ifelse(is.na(p_pos_musl), p_like_musl, p_pos_musl),

    # Desire to ban the niqab
    p_ban_niqab = case_when(
      p_banniqab %in% 5 ~ 0,
      p_banniqab %in% 1 ~ 1,
      TRUE ~ as.double(NA)),
    
    # Other
    isslist_sena = isslist_sena,
    isscare_sena = isscare_sena, 
    
    # Measures of exposure to the campaign
    p_media_exposure = case_when(
      p_mediatim == 1 ~ 0,
      p_mediatim == 2 ~ 1,
      p_mediatim == 3 ~ 2,
      p_mediatim == 4 ~ 3,
      p_mediatim == 5 ~ 4,
      p_mediatim == 6 ~ 5,
      TRUE ~ as.double(NA)),
    ad_exposure = case_when(
      ads == 1 ~ 2,
      ads == 2 ~ 1,
      ads == 3 ~ 0,
      TRUE ~ as.double(NA)),


    # Feeling thermometers (parties and leaders)
    feel_con = ifelse(ptfeel_cons %in% c(996,998,999), NA, ptfeel_cons),
    feel_lib = ifelse(ptfeel_libs == 1000, NA, ptfeel_libs),
    feel_ndp = ifelse(ptfeel_ndp == 1000, NA, ptfeel_ndp),
    feel_blq = ifelse(ptfeel_bq == 1000, NA, ptfeel_bq),
    feel_hrpr = ifelse(chnc_ndp == 1000, NA, ldrfeel_hrpr),
    feel_trud = ifelse(chnc_ndp == 1000, NA, ldrfeel_trud),
    feel_mulc = ifelse(chnc_ndp == 1000, NA, ldrfeel_mulc),
    feel_ducp = ifelse(chnc_ndp == 1000, NA, ldrfeel_ducp),
    
    # Voted choice 2011
    voted_2011 = case_when(
      vote_2011 == 1 ~ "lib",
      vote_2011 == 2 ~ "con",
      vote_2011 == 3 ~ "ndp",
      vote_2011 == 4 ~ "bq",
      vote_2011 == 5 ~ "gre",
      vote_2011 == 7 ~ "spoiled",
      TRUE ~ NA_character_),
    vote_2011_ndp = ifelse(voted_2011 == "ndp",1,0),
    vote_2011_lib = ifelse(voted_2011 == "lib",1,0),
    vote_2011_con = ifelse(voted_2011 == "con",1,0),
    
    # English speaking
    french = as.numeric(case_when(first_lang == 5 ~ 1, 
                                  TRUE ~ as.double(0))),
    court = ifelse(date > "2015-09-15", 1, 0),
    court_linear = ifelse(date > "2015-09-15", as.numeric(date - 16692), 0),
    last_2week_dummy = ifelse(date > "2015-10-05", 1, 0),
    last_week_dummy = ifelse(date > "2015-10-12", 1, 0),
    week_of_campaign = (as.numeric(date) - min(as.numeric(date), na.rm = T) + 7) %/% 7,
    day_of_campaign = (as.numeric(date) - min(as.numeric(date), na.rm = T)) + 1,
    group = week_of_campaign,
    voted_for_all_parties = factor(case_when(
      p_votechce == 1 ~ "lib",
      p_votechce == 2 ~ "con",
      p_votechce == 3 ~ "ndp",
      p_votechce == 4 ~ "bq",
      p_votechce == 5 ~ "green",
      p_votechce %in% c(0,8) ~ "other",
      p_votechce %in% c(7) ~ "wont",
      p_votechce %in% c(9,98) ~ "undecided",
      TRUE ~ NA_character_)),
    ndp_voted = ifelse(voted_for_all_parties == "ndp",1,0),
    lib_voted = ifelse(voted_for_all_parties == "lib",1,0),
    con_voted = ifelse(voted_for_all_parties == "con",1,0),
    bqc_voted = ifelse(voted_for_all_parties == "bq", 1, 0),
    ndp_switchers_neg = ifelse(ndp_vote == 1 & ndp_voted == 0, 1, 0),
    ndp_switchers_pos = ifelse(ndp_vote == 0 & ndp_voted == 1, 1, 0),
    ndp_switchers = case_when(ndp_switchers_neg == 1 ~ -1,
                              ndp_switchers_pos == 1 ~ 1,
                              TRUE ~ as.double(0)))

CES <- dummy_cols(CES, select_columns = "group")
CES <- dummy_cols(CES, select_columns = "region")

CES <- CES %>% mutate(knowledge = dplyr::select(., finance_minister:putin) %>% apply(1, sum, na.rm=TRUE))

CES <-  merge(CES, media, by = c("date","quebec"))

saveRDS(CES, file = "data/ready/CES.rds")

```

# LPP

```{r}

#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/DACHKP#
load("data/raw/LPP2015.RData")
LPP = x; x = NULL

LPP <- LPP %>%
  transmute(
    vote_for_all_parties = firstchoice,
    vote_for_all_parties = factor(case_when(
      vote_for_all_parties == 1 ~ "lib",
      vote_for_all_parties == 2 ~ "con",
      vote_for_all_parties == 3 ~ "ndp",
      vote_for_all_parties == 4 ~ "bq",
      vote_for_all_parties == 5 ~ "green",
      vote_for_all_parties %in% c(6,7) ~ "undecided",
      TRUE ~ NA_character_)),
    date = as.Date(as.POSIXct(as.character(enddate), 'EST')),
    male = ifelse(gender == 1, 1, 0),
    age = as.numeric(age),
    female = ifelse(male == 1, 0, 1),
    french = ifelse(language == 2, 1, 0),
    region = factor(case_when(
      province %in% "Quebec" ~ 'QC',
      province %in% "Ontario" ~ 'ON',
      province %in% c("Manitoba","Saskatchewan","Manitoba") ~ 'PR',
      province %in% c("Nova Scotia","Newfoundland and Labrador","New Brunswick", "Prince Edward Island") ~ 'AT',
      province == "British Columbia" ~ 'BC',
      TRUE ~ NA_character_)),
    quebec = ifelse(region == "QC",1,0),
    working = ifelse(employment %in% c(1,2,7,8),1,0), 
    student = ifelse(employment == 10,1,0) , 
    retired = ifelse(employment == 9,1,0),
    education = as.factor(case_when(
      education %in% c(1) ~ 1,
      education %in% c(2) ~ 2,
      education %in% c(3,4,5) ~ 3,
      education %in% c(6,7,8) ~ 4,
      TRUE ~ as.double(NA))),
    education = factor(education, 
                       labels = c("No HS","HS","Bachelor","Graduate"),
                       levels = c(1,2,3,4)),
    
    ndp_vote = ifelse(vote_for_all_parties == "ndp",1,0),
    vote_2011_ndp = case_when(vote2011 == 3 ~ as.double(1),
                              vote2011 == 9 ~ as.double(NA),
                              TRUE ~ as.double(0)),
    
    court = ifelse(date > "2015-09-15", 1, 0),
    court_linear = ifelse(date > "2015-09-15", as.numeric(date - 16692), 0),
    day_of_campaign = (as.numeric(date) - min(as.numeric(date), na.rm = T)) + 1)

LPP <- dummy_cols(LPP, select_columns = "region")

# Merge the data keeping both French- and English-media 
LPP <-  merge(LPP, media, by = c("date","quebec"))

saveRDS(LPP, file = "data/ready/LPP.rds")

```
